{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#hide\n",
    "#skip\n",
    "! [ -e /content ] && pip install -Uqq fastai  # upgrade fastai on colab"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "from fastai.torch_basics import *\n",
    "from fastai.data.all import *\n",
    "from fastai.tabular.core import *\n",
    "try: import cudf,nvcategory\n",
    "except: print(\"This requires rapids, see https://rapids.ai/ for installation details\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#hide\n",
    "from nbdev.showdoc import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#default_exp tabular.rapids"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tabular with rapids\n",
    "\n",
    "> Basic functions to preprocess tabular data before assembling it in a `DataLoaders` on the GPU."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@patch\n",
    "def __array__(self:cudf.DataFrame): return self.pandas().__array__()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "class TabularGPU(Tabular):\n",
    "    def transform(self, cols, f):\n",
    "        for c in cols: self[c] = f(self[c])\n",
    "\n",
    "    def __getattr__(self,k):\n",
    "        if isinstance(self.items, cudf.DataFrame) and k in self.items.columns: return self.items[k]\n",
    "        return super().__getattr__(k)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TabularProcessors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def _to_str(c): return c if c.dtype == \"object\" else c.astype(\"str\")\n",
    "def _remove_none(c):\n",
    "    if None in c: c.remove(None)\n",
    "    return c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@Categorify\n",
    "def setups(self, to: TabularGPU):\n",
    "    self.lbls = {n: nvcategory.from_strings(_to_str(to.iloc[:,n]).data).keys() for n in to.all_cat_names}\n",
    "    self.classes = {n: CategoryMap(_remove_none(c.to_host()), add_na=(n in to.cat_names)) for n,c in self.lbls.items()}\n",
    "\n",
    "@patch\n",
    "def _apply_cats_gpu(self: Categorify, c):\n",
    "    return cudf.Series(nvcategory.from_strings(_to_str(c).data).set_keys(self.lbls[c.name]).values()).add(add)\n",
    "\n",
    "@Categorify\n",
    "def encodes(self, to: TabularGPU):\n",
    "    def _apply_cats_gpu(add, c):\n",
    "        return cudf.Series(nvcategory.from_strings(_to_str(c).data).set_keys(self.lbls[c.name]).values()).add(add)\n",
    "    to.transform(to.cat_names, partial(_apply_cats_gpu, 1))\n",
    "    to.transform(L(to.cat_y),  partial(_apply_cats_gpu, 0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = cudf.from_pandas(pd.DataFrame({'a':[0,1,2,0,2]}))\n",
    "to = TabularGPU(df, Categorify, 'a')\n",
    "cat = to.procs.categorify\n",
    "test_eq(list(cat['a']), ['#na#','0','1','2'])\n",
    "test_eq(to.a.to_array(), np.array([1,2,3,1,3]))\n",
    "df1 = cudf.from_pandas(pd.DataFrame({'a':[1,0,3,-1,2]}))\n",
    "to1 = to.new(df1)\n",
    "cat(to1)\n",
    "#Values that weren't in the training df are sent to 0 (na)\n",
    "test_eq(to1.a.to_array(), np.array([2,1,0,0,3]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Test decode\n",
    "to2 = TabularPandas(to1.items.to_pandas(), None, 'a')\n",
    "to2 = cat.decode(to2)\n",
    "test_eq(to2.a, np.array(['1','0','#na#','#na#','2']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = cudf.from_pandas(pd.DataFrame({'a':[0,1,2,3,2]}))\n",
    "to = TabularGPU(df, Categorify, 'a', splits=[[0,1,2], [3,4]])\n",
    "cat = to.procs.categorify\n",
    "test_eq(list(cat['a']), ['#na#','0','1','2'])\n",
    "test_eq(to.a.to_array(), np.array([1,2,3,0,3]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#TODO Categorical (fails for now)\n",
    "#df = cudf.from_pandas(pd.DataFrame({'a':pd.Categorical(['M','H','L','M'], categories=['H','M','L'], ordered=True)}))\n",
    "#to = TabularGPU(df, Categorify, 'a')\n",
    "#cat = to.procs.categorify\n",
    "#test_eq(cat['a'].to_host(), ['H','M','L'])\n",
    "#test_eq(df[\"a\"].to_array(), [2,1,3,2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@Normalize\n",
    "def setups(self, to: TabularGPU):\n",
    "    self.means = {n: to.iloc[:,n].mean()           for n in to.cont_names}\n",
    "    self.stds  = {n: to.iloc[:,n].std(ddof=0)+1e-7 for n in to.cont_names}\n",
    "\n",
    "@Normalize\n",
    "def encodes(self, to: TabularGPU):\n",
    "    to.transform(to.cont_names, lambda c: (c-self.means[c.name])/self.stds[c.name])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = cudf.from_pandas(pd.DataFrame({'a':[0,1,2,3,4]}))\n",
    "to = TabularGPU(df, Normalize, cont_names='a')\n",
    "norm = to.procs.normalize\n",
    "x = np.array([0,1,2,3,4])\n",
    "m,s = x.mean(),x.std()\n",
    "test_eq(norm.means['a'], m)\n",
    "test_close(norm.stds['a'], s)\n",
    "test_close(to.a.to_array(), (x-m)/s)\n",
    "df1 = cudf.from_pandas(pd.DataFrame({'a':[5,6,7]}))\n",
    "to1 = to.new(df1)\n",
    "norm(to1)\n",
    "test_close(to1.a.to_array(), (np.array([5,6,7])-m)/s)\n",
    "\n",
    "to2 = TabularPandas(to1.items.to_pandas(), None, cont_names='a')\n",
    "to2 = norm.decode(to2)\n",
    "test_close(to2.a, [5,6,7])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = cudf.from_pandas(pd.DataFrame({'a':[0,1,2,3,4]}))\n",
    "to = TabularGPU(df, Normalize, cont_names='a', splits=[[0,1,2], [3,4]])\n",
    "norm = to.procs.normalize\n",
    "\n",
    "x = np.array([0,1,2])\n",
    "m,s = x.mean(),x.std()\n",
    "test_eq(norm.means, {'a': m})\n",
    "test_close(norm.stds['a'], s)\n",
    "test_close(to.a.to_array(), (np.array([0,1,2,3,4])-m)/s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@patch\n",
    "def median(self:cudf.Series):\n",
    "    \"Get the median of `self`\"\n",
    "    col = self.dropna().reset_index(drop=True).sort_values()\n",
    "    return col[len(col)//2] if len(col)%2 != 0 else (col[len(col)//2]+col[len(col)//2-1])/2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "col = cudf.Series([0,1,np.nan,1,2,3,4])\n",
    "test_eq(col.median(), 1.5)\n",
    "col = cudf.Series([np.nan,1,np.nan,1,2,3,4])\n",
    "test_eq(col.median(), 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@patch\n",
    "def idxmax(self:cudf.Series):\n",
    "    \"Return the index of the first occurrence of the max in `self`\"\n",
    "    return self.argsort(ascending=False).index[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@FillMissing\n",
    "def setups(self, to: TabularGPU):\n",
    "    self.na_dict = {}\n",
    "    for n in to.cont_names:\n",
    "        col = to.iloc[:, n]\n",
    "        if col.isnull().any(): self.na_dict[n] = self.fill_strategy(col, self.fill_vals[n])\n",
    "\n",
    "@FillMissing\n",
    "def encodes(self, to: TabularGPU):\n",
    "    for n in to.cont_names:\n",
    "        if n in self.na_dict:\n",
    "            if self.add_col:\n",
    "                to.items[n+'_na'] = to[n].isnull()\n",
    "                if n+'_na' not in to.cat_names: to.cat_names.append(n+'_na')\n",
    "            to[n] = to[n].fillna(self.na_dict[n])\n",
    "        elif df[n].isnull().any():\n",
    "            raise Exception(f\"nan values in `{n}` but not in setup training set\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fill1,fill2,fill3 = (FillMissing(fill_strategy=s) \n",
    "                     for s in [FillStrategy.median, FillStrategy.constant, FillStrategy.mode])\n",
    "df = cudf.from_pandas(pd.DataFrame({'a':[0,1,np.nan,1,2,3,4]}))\n",
    "df1 = df.copy(); df2 = df.copy()\n",
    "tos = TabularGPU(df, fill1, cont_names='a'),TabularGPU(df1, fill2, cont_names='a'),TabularGPU(df2, fill3, cont_names='a')\n",
    "\n",
    "test_eq(fill1.na_dict, {'a': 1.5})\n",
    "test_eq(fill2.na_dict, {'a': 0})\n",
    "test_eq(fill3.na_dict, {'a': 1.0})\n",
    "\n",
    "for t in tos: test_eq(t.cat_names, ['a_na'])\n",
    "\n",
    "for to_,v in zip(tos, [1.5, 0., 1.]):\n",
    "    test_eq(to_.a.to_array(), np.array([0, 1, v, 1, 2, 3, 4]))\n",
    "    test_eq(to_.a_na.to_array(), np.array([0, 0, 1, 0, 0, 0, 0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfa = cudf.from_pandas(pd.DataFrame({'a':[np.nan,0,np.nan]}))\n",
    "tos = [t.new(o) for t,o in zip(tos,(dfa,dfa.copy(),dfa.copy()))]\n",
    "for t in tos: t.process()\n",
    "for to_,v in zip(tos, [1.5, 0., 1.]):\n",
    "    test_eq(to_.a.to_array(), np.array([v, 0, v]))\n",
    "    test_eq(to_.a_na.to_array(), np.array([1, 0, 1]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Tabular Pipelines -"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "procs = [Normalize, Categorify, FillMissing, noop]\n",
    "df = cudf.from_pandas(pd.DataFrame({'a':[0,1,2,1,1,2,0], 'b':[0,1,np.nan,1,2,3,4]}))\n",
    "to = TabularGPU(df, procs, cat_names='a', cont_names='b')\n",
    "\n",
    "#Test setup and apply on df_trn\n",
    "test_eq(to.a.to_array(), [1,2,3,2,2,3,1])\n",
    "test_eq(to.b_na.to_array(), [1,1,2,1,1,1,1])\n",
    "x = np.array([0,1,1.5,1,2,3,4])\n",
    "m,s = x.mean(),x.std()\n",
    "test_close(to.b.to_array(), (x-m)/s)\n",
    "test_eq(to.procs.classes, {'a': ['#na#','0','1','2'], 'b_na': ['#na#','False','True']})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Test apply on y_names\n",
    "procs = [Normalize, Categorify, FillMissing, noop]\n",
    "df = cudf.from_pandas(pd.DataFrame({'a':[0,1,2,1,1,2,0], 'b':[0,1,np.nan,1,2,3,4], 'c': ['b','a','b','a','a','b','a']}))\n",
    "to = TabularGPU(df, procs, cat_names='a', cont_names='b', y_names='c')\n",
    "\n",
    "test_eq(to.cat_names, ['a', 'b_na'])\n",
    "test_eq(to.a.to_array(), [1,2,3,2,2,3,1])\n",
    "test_eq(to.b_na.to_array(), [1,1,2,1,1,1,1])\n",
    "test_eq(to.c.to_array(), [1,0,1,0,0,1,0])\n",
    "x = np.array([0,1,1.5,1,2,3,4])\n",
    "m,s = x.mean(),x.std()\n",
    "test_close(to.b.to_array(), (x-m)/s)\n",
    "test_eq(to.procs.classes, {'a': ['#na#','0','1','2'], 'b_na': ['#na#','False','True'], 'c': ['a','b']})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "procs = [Normalize, Categorify, FillMissing, noop]\n",
    "df = cudf.from_pandas(pd.DataFrame({'a':[0,1,2,1,1,2,0], 'b':[0,1,np.nan,1,2,3,4], 'c': ['b','a','b','a','a','b','a']}))\n",
    "to = TabularGPU(df, procs, cat_names='a', cont_names='b', y_names='c')\n",
    "\n",
    "test_eq(to.cat_names, ['a', 'b_na'])\n",
    "test_eq(to.a.to_array(), [1,2,3,2,2,3,1])\n",
    "test_eq(to.a.dtype,int)\n",
    "test_eq(to.b_na.to_array(), [1,1,2,1,1,1,1])\n",
    "test_eq(to.c.to_array(), [1,0,1,0,0,1,0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "procs = [Normalize, Categorify, FillMissing, noop]\n",
    "df = cudf.from_pandas(pd.DataFrame({'a':[0,1,2,1,1,2,0], 'b':[0,np.nan,1,1,2,3,4], 'c': ['b','a','b','a','a','b','a']}))\n",
    "to = TabularGPU(df, procs, cat_names='a', cont_names='b', y_names='c', splits=[[0,1,4,6], [2,3,5]])\n",
    "\n",
    "test_eq(to.cat_names, ['a', 'b_na'])\n",
    "test_eq(to.a.to_array(), [1,2,2,1,0,2,0])\n",
    "test_eq(to.a.dtype,int)\n",
    "test_eq(to.b_na.to_array(), [1,2,1,1,1,1,1])\n",
    "test_eq(to.c.to_array(), [1,0,0,0,1,0,1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "from torch.utils.dlpack import from_dlpack\n",
    "\n",
    "@ReadTabBatch\n",
    "def encodes(self, to: TabularGPU):\n",
    "    return from_dlpack(to.cats.to_dlpack()).long(),from_dlpack(to.conts.to_dlpack()).float(), from_dlpack(to.targ.to_dlpack()).long()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Integration example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<cudf.DataFrame ncols=15 nrows=5 >"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "path = untar_data(URLs.ADULT_SAMPLE)\n",
    "df = cudf.from_pandas(pd.read_csv(path/'adult.csv'))\n",
    "df_trn,df_tst = df.iloc[:10000].copy(),df.iloc[10000:].copy()\n",
    "df_trn.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']\n",
    "cont_names = ['age', 'fnlwgt', 'education-num']\n",
    "procs = [Categorify, FillMissing, Normalize]\n",
    "\n",
    "splits = RandomSplitter()(range_of(df_trn))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 755 ms, sys: 36.7 ms, total: 792 ms\n",
      "Wall time: 798 ms\n"
     ]
    }
   ],
   "source": [
    "%time to = TabularGPU(df_trn, procs, splits=splits, cat_names=cat_names, cont_names=cont_names, y_names=\"salary\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "splits = [list(range(len(splits[0]))), list(range(len(splits[0]), 10000))]\n",
    "dsets = Datasets(to, splits=splits, tfms=[None])\n",
    "dl = TabDataLoader(to.valid, bs=64, num_workers=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/sgugger/anaconda3/lib/python3.7/site-packages/cudf/io/dlpack.py:83: UserWarning: WARNING: cuDF to_dlpack() produces column-major (Fortran order) output. If the output tensor needs to be row major, transpose the output of this function.\n",
      "  return cpp_dlpack.to_dlpack(gdf_cols)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education-num</th>\n",
       "      <th>workclass</th>\n",
       "      <th>education</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>education-num_na</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>44.0</td>\n",
       "      <td>282721.999450</td>\n",
       "      <td>15.0</td>\n",
       "      <td>Self-emp-not-inc</td>\n",
       "      <td>Prof-school</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Protective-serv</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>False</td>\n",
       "      <td>&gt;=50k</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>18.0</td>\n",
       "      <td>116528.002955</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>Some-college</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>False</td>\n",
       "      <td>&lt;50k</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>52.0</td>\n",
       "      <td>253783.997089</td>\n",
       "      <td>7.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>11th</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Priv-house-serv</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>White</td>\n",
       "      <td>False</td>\n",
       "      <td>&lt;50k</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>39.0</td>\n",
       "      <td>175231.999649</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Federal-gov</td>\n",
       "      <td>Some-college</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Machine-op-inspct</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>True</td>\n",
       "      <td>&gt;=50k</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>44.0</td>\n",
       "      <td>36271.003439</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>False</td>\n",
       "      <td>&lt;50k</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>53.0</td>\n",
       "      <td>196277.999985</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>Some-college</td>\n",
       "      <td>Widowed</td>\n",
       "      <td>Tech-support</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>False</td>\n",
       "      <td>&lt;50k</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>29.0</td>\n",
       "      <td>150860.998472</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>Some-college</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Armed-Forces</td>\n",
       "      <td>Not-in-family</td>\n",
       "      <td>White</td>\n",
       "      <td>False</td>\n",
       "      <td>&lt;50k</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>39.0</td>\n",
       "      <td>139647.001399</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>Some-college</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Farming-fishing</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>White</td>\n",
       "      <td>False</td>\n",
       "      <td>&lt;50k</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>49.0</td>\n",
       "      <td>481986.987541</td>\n",
       "      <td>9.0</td>\n",
       "      <td>?</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>False</td>\n",
       "      <td>&lt;50k</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>48.0</td>\n",
       "      <td>205423.999545</td>\n",
       "      <td>13.0</td>\n",
       "      <td>Private</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Tech-support</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>White</td>\n",
       "      <td>False</td>\n",
       "      <td>&gt;=50k</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "dl.show_batch()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Export -"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Converted 00_test.ipynb.\n",
      "Converted 01_core.ipynb.\n",
      "Converted 01a_utils.ipynb.\n",
      "Converted 01b_dispatch.ipynb.\n",
      "Converted 01c_torch_core.ipynb.\n",
      "Converted 02_script.ipynb.\n",
      "Converted 03_dataloader.ipynb.\n",
      "Converted 04_transform.ipynb.\n",
      "Converted 05_data_core.ipynb.\n",
      "Converted 06_data_transforms.ipynb.\n",
      "Converted 07_vision_core.ipynb.\n",
      "Converted 08_pets_tutorial.ipynb.\n",
      "Converted 09_vision_augment.ipynb.\n",
      "Converted 10_data_block.ipynb.\n",
      "Converted 11_layers.ipynb.\n",
      "Converted 11a_vision_models_xresnet.ipynb.\n",
      "Converted 12_optimizer.ipynb.\n",
      "Converted 13_learner.ipynb.\n",
      "Converted 14_callback_schedule.ipynb.\n",
      "Converted 14a_callback_data.ipynb.\n",
      "Converted 15_callback_hook.ipynb.\n",
      "Converted 15a_vision_models_unet.ipynb.\n",
      "Converted 16_callback_progress.ipynb.\n",
      "Converted 17_callback_tracker.ipynb.\n",
      "Converted 18_callback_fp16.ipynb.\n",
      "Converted 19_callback_mixup.ipynb.\n",
      "Converted 20_metrics.ipynb.\n",
      "Converted 21_vision_learner.ipynb.\n",
      "Converted 22_tutorial_imagenette.ipynb.\n",
      "Converted 23_tutorial_transfer_learning.ipynb.\n",
      "Converted 30_text_core.ipynb.\n",
      "Converted 31_text_data.ipynb.\n",
      "Converted 32_text_models_awdlstm.ipynb.\n",
      "Converted 33_text_models_core.ipynb.\n",
      "Converted 34_callback_rnn.ipynb.\n",
      "Converted 35_tutorial_wikitext.ipynb.\n",
      "Converted 36_text_models_qrnn.ipynb.\n",
      "Converted 37_text_learner.ipynb.\n",
      "Converted 38_tutorial_ulmfit.ipynb.\n",
      "Converted 40_tabular_core.ipynb.\n",
      "Converted 41_tabular_model.ipynb.\n",
      "Converted 42_tabular_rapids.ipynb.\n",
      "Converted 50_data_block_examples.ipynb.\n",
      "Converted 60_medical_imaging.ipynb.\n",
      "Converted 90_notebook_core.ipynb.\n",
      "Converted 91_notebook_export.ipynb.\n",
      "Converted 92_notebook_showdoc.ipynb.\n",
      "Converted 93_notebook_export2html.ipynb.\n",
      "Converted 94_notebook_test.ipynb.\n",
      "Converted 95_index.ipynb.\n",
      "Converted 96_data_external.ipynb.\n",
      "Converted 97_utils_test.ipynb.\n",
      "Converted notebook2jekyll.ipynb.\n"
     ]
    }
   ],
   "source": [
    "#hide\n",
    "from nbdev.export import notebook2script\n",
    "notebook2script()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
